library(readxl)
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

数据预处理

导入数据

rm(list=ls())


tdata <- read_excel("../data/tuoye.xlsx", col_names = TRUE, row.names(TRUE) )
## New names:
## • `` -> `...1`
xdata <- read_excel("../data/xueqing.xlsx", col_names = TRUE, row.names(TRUE) )
## New names:
## • `` -> `...1`

数据清洗 正态性检验

检查正态性 Shapiro-Wilk 检验 p < 0.05证明不是正态分布

唾液数据

#创建因子列表

factors <- colnames(tdata)[3:(ncol(tdata)-3)]  

#对因子正态性逐个检验
normality <- data.frame(Factor = character(), Shapiro_p_value = numeric(), stringsAsFactors = FALSE)

# 遍历每个因子,进行Shapiro-Wilk检验
for (factor in factors) {
  test_result <- shapiro.test(tdata[[factor]])  # 对每个因子进行正态性检验
  normality <- rbind(normality, data.frame(Factor = factor, Shapiro_p_value = test_result$p.value))
}

normality
##    Factor Shapiro_p_value
## 1    MCP1     0.895041143
## 2    TNFa     0.597066226
## 3   CASP1     0.106085918
## 4     LDH     0.437689562
## 5  GM-CSF     0.267332512
## 6   MIP1a     0.009081237
## 7   HIF1a     0.027476154
## 8     ALP     0.180940921
## 9    BMP2     0.042243836
## 10  VEGFA     0.061479671
## 11   NTXI     0.006868238
## 12   COX2     0.011428387
## 13    MPO     0.239740903
## 14    IL6     0.109711119
## 15   IL17     0.034172419
## 16   IL1b     0.380964534
## 17   CTSK     0.115337539
## 18   MMP9     0.008590777
## 19 PECAM1     0.005958055
## 20    CAT     0.104453106
## 21  MMP-8     0.067476021
not_normal_tfactors <- subset(normality, Shapiro_p_value < 0.05)
not_normal_tfactors
##    Factor Shapiro_p_value
## 6   MIP1a     0.009081237
## 7   HIF1a     0.027476154
## 9    BMP2     0.042243836
## 11   NTXI     0.006868238
## 12   COX2     0.011428387
## 15   IL17     0.034172419
## 18   MMP9     0.008590777
## 19 PECAM1     0.005958055
write.csv(not_normal_tfactors, "../result/tdata_not_normal.csv", row.names = FALSE)

血清数据

#对因子正态性逐个检验
normality <- data.frame(Factor = character(), Shapiro_p_value = numeric(), stringsAsFactors = FALSE)

# 遍历每个因子,进行Shapiro-Wilk检验
for (factor in factors) {
  test_result <- shapiro.test(xdata[[factor]])  # 对每个因子进行正态性检验
  normality <- rbind(normality, data.frame(Factor = factor, Shapiro_p_value = test_result$p.value))
}

normality
##    Factor Shapiro_p_value
## 1    MCP1    0.0240963557
## 2    TNFa    0.0025059918
## 3   CASP1    0.0015234526
## 4     LDH    0.0156311860
## 5  GM-CSF    0.0238918342
## 6   MIP1a    0.0065652202
## 7   HIF1a    0.0041192518
## 8     ALP    0.0074922249
## 9    BMP2    0.0055832873
## 10  VEGFA    0.0299140426
## 11   NTXI    0.0016756912
## 12   COX2    0.0042097370
## 13    MPO    0.0077907045
## 14    IL6    0.0006423363
## 15   IL17    0.0097113058
## 16   IL1b    0.0021889971
## 17   CTSK    0.0016493802
## 18   MMP9    0.0120477205
## 19 PECAM1    0.0004854068
## 20    CAT    0.1358007572
## 21  MMP-8    0.0158732642
not_normal_xfactors <- subset(normality, Shapiro_p_value < 0.05)
not_normal_xfactors
##    Factor Shapiro_p_value
## 1    MCP1    0.0240963557
## 2    TNFa    0.0025059918
## 3   CASP1    0.0015234526
## 4     LDH    0.0156311860
## 5  GM-CSF    0.0238918342
## 6   MIP1a    0.0065652202
## 7   HIF1a    0.0041192518
## 8     ALP    0.0074922249
## 9    BMP2    0.0055832873
## 10  VEGFA    0.0299140426
## 11   NTXI    0.0016756912
## 12   COX2    0.0042097370
## 13    MPO    0.0077907045
## 14    IL6    0.0006423363
## 15   IL17    0.0097113058
## 16   IL1b    0.0021889971
## 17   CTSK    0.0016493802
## 18   MMP9    0.0120477205
## 19 PECAM1    0.0004854068
## 21  MMP-8    0.0158732642
write.csv(not_normal_xfactors, "../result/xdata_not_normal.csv", row.names = FALSE)

最终得到血清数据中除了CAT都是非正态数据 唾液数据中非正态数据包括:MIP1a, HIF1a, BMP2, NTXI, COX2, IL17, MMP9, PECAM1

相关性分析

正态分布数据使用Pearson 相关系数,非正态分布的数据使用Spearman 相关系数

BV/TV与唾液因子

相关性矩阵

# 正态分布因子
cor_matrix_BVTV_t <- cor(tdata[, c("BV/TV",  "MCP1" ,  "TNFa" ,  "CASP1" , "LDH" ,   "GM-CSF" ,  "ALP"   ,   "VEGFA"   , "MPO"  ,  "IL6" ,    "IL1b"  , "CTSK"   ,   "MMP-8",  "CAT"  )], use = "complete.obs", method = "pearson")

write.csv(cor_matrix_BVTV_t, "../result/cor_matrix/bvtv_t.csv",row.names = TRUE) 

# 非正态分布因子
cor_matrix_spearman_BVTV_t <- cor(tdata[, c("BV/TV",  "MIP1a" , "HIF1a" ,  "BMP2"  , "NTXI" ,  "COX2"   ,   "IL17"  , "MMP9" ,  "PECAM1" )], use = "complete.obs", method = "spearman")

write.csv(cor_matrix_spearman_BVTV_t, "../result/cor_matrix/bvtv_t_non_normal.csv", row.names = TRUE) 


tdata$Tb.Sp
##  [1] 0.2025 0.2051 0.2088 0.1444 0.2114 0.2678 0.3539 0.3950 0.3320 0.3524
## [11] 0.3554 0.4678 0.6661 0.6473 0.5995 0.5837 0.6609 0.6045

可视化

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_BVTV_t, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_BVTV_t, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

#corrplot(cor_matrix_spearman_BVTV_t, method = "circle", type = "upper",  addCoef.col = "black", order = #"hclust", tl.col = "black", tl.srt = 45)

相关性显著性检验

normal_tfactors <- c("MCP1", "TNFa", "CASP1", "LDH", "GM-CSF", "ALP", "VEGFA", "MPO", "IL6", "IL1b", "CTSK", "MMP-8", "CAT")

non_normal_tfactors <- c("MIP1a", "HIF1a", "BMP2", "NTXI", "COX2", "IL17", "MMP9", "PECAM1")



cor_test_normal_BVTV_t <- data.frame(Factor = character(), 
                              Pearson_p_value = numeric(), 
                              stringsAsFactors = FALSE)

# 对每个正态分布的因子进行 Pearson 相关性检验
for (factor in normal_tfactors) {
  test_result <- cor.test(tdata$`BV/TV`, tdata[[factor]], method = "pearson")
  cor_test_normal_BVTV_t <- rbind(cor_test_normal_BVTV_t, data.frame(Factor = factor, Pearson_p_value = test_result$p.value))
}





cor_test_non_normal_bvtv_t <- data.frame(Factor = character(), 
                                  Spearman_p_value = numeric(), 
                                  stringsAsFactors = FALSE)

# 对每个非正态分布的因子进行 Spearman 相关性检验
for (factor in non_normal_tfactors) {
  test_result <- cor.test(tdata$`BV/TV`, tdata[[factor]], method = "spearman")
  cor_test_non_normal_bvtv_t <- rbind(cor_test_non_normal_bvtv_t, data.frame(Factor = factor, Spearman_p_value = test_result$p.value))
}
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(tdata$`BV/TV`, tdata[[factor]], method =
## "spearman"): Cannot compute exact p-value with ties
cor_test_normal_BVTV_t
##    Factor Pearson_p_value
## 1    MCP1       0.9463708
## 2    TNFa       0.8909695
## 3   CASP1       0.9173662
## 4     LDH       0.8554642
## 5  GM-CSF       0.7574734
## 6     ALP       0.5222242
## 7   VEGFA       0.3835144
## 8     MPO       0.4025683
## 9     IL6       0.3422514
## 10   IL1b       0.7704817
## 11   CTSK       0.3104577
## 12  MMP-8       0.5253370
## 13    CAT       0.7109149
cor_test_non_normal_bvtv_t
##   Factor Spearman_p_value
## 1  MIP1a        0.9252868
## 2  HIF1a        0.8097502
## 3   BMP2        0.8160931
## 4   NTXI        0.9285272
## 5   COX2        0.8994083
## 6   IL17        0.4641971
## 7   MMP9        0.9707317
## 8 PECAM1        0.8736259
write.csv(cor_test_normal_BVTV_t, "../result/cor_test_bvtv_t.csv", row.names = FALSE)

write.table(cor_test_non_normal_bvtv_t, "../result/cor_test_bvtv_t.csv", append = TRUE, sep = ",", col.names = TRUE, row.names = FALSE)
## Warning in write.table(cor_test_non_normal_bvtv_t,
## "../result/cor_test_bvtv_t.csv", : appending column names to file

BV/TV与血清因子

cor_matrix_bvtv_x <- cor(tdata[, c("BV/TV", "CAT" )], use = "complete.obs", method = "pearson")

write.csv(cor_matrix_bvtv_x, "../result/cor_matrix/bvtv_x.csv",row.names = TRUE) 



# 血清因子除了CAT都为非正态分布
cor_matrix_spearman_BVTV_x <- cor(xdata[, c("BV/TV", factors[factors != "CAT"])], use = "complete.obs", method = "spearman")

write.csv(cor_matrix_spearman_BVTV_x, "../result/cor_matrix/bvtv_x_non_normal.csv", row.names = TRUE) 
corrplot(cor_matrix_bvtv_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

corrplot(cor_matrix_spearman_BVTV_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_bvtv_x, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_BVTV_x, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

Tb.sp与唾液因子

# 正态分布因子
cor_matrix_tbsp_t <- cor(tdata[, c("Tb.Sp",  "MCP1" ,  "TNFa" ,  "CASP1" , "LDH" ,   "GM-CSF" ,  "ALP"   ,   "VEGFA"   , "MPO"  ,  "IL6" ,    "IL1b"  , "CTSK"   ,   "MMP-8", "CAT" )], use = "complete.obs", method = "pearson")

write.csv(cor_matrix_tbsp_t, "../result/cor_matrix/tbsp_t.csv",row.names = TRUE) 

# 非正态分布因子
cor_matrix_spearman_tbsp_t <- cor(tdata[, c("Tb.Sp",  "MIP1a" , "HIF1a" ,  "BMP2"  , "NTXI" ,  "COX2"   ,   "IL17"  , "MMP9" ,  "PECAM1")], use = "complete.obs", method = "spearman")

write.csv(cor_matrix_spearman_tbsp_t, "../result/cor_matrix/tbsp_t_non_normal.csv", row.names = TRUE) 
corrplot(cor_matrix_tbsp_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

corrplot(cor_matrix_spearman_tbsp_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_tbsp_t, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_tbsp_t, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

Tb.Sp与血清因子

cor_matrix_tbsp_x <- cor(tdata[, c("Tb.Sp", "CAT" )], use = "complete.obs", method = "pearson")

write.csv(cor_matrix_tbsp_x, "../result/cor_matrix/tbsp_x.csv",row.names = TRUE) 



# 血清因子除了CAT都为非正态分布
cor_matrix_spearman_tbsp_x <- cor(xdata[, c("Tb.Sp", factors[factors != "CAT"])], use = "complete.obs", method = "spearman")

write.csv(cor_matrix_spearman_tbsp_x, "../result/cor_matrix/tbsp_x_non_normal.csv", row.names = TRUE) 
corrplot(cor_matrix_tbsp_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

corrplot(cor_matrix_spearman_tbsp_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_tbsp_x, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_tbsp_x, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

M1与唾液因子

# 正态分布因子
cor_matrix_m1_t <- cor(tdata[, c("M1",  "MCP1" ,  "TNFa" ,  "CASP1" , "LDH" ,   "GM-CSF" ,  "ALP"   ,   "VEGFA"   , "MPO"  ,  "IL6" ,    "IL1b"  , "CTSK"   ,   "MMP-8" , "CAT")], use = "complete.obs", method = "pearson")

write.csv(cor_matrix_m1_t, "../result/cor_matrix/m1_t.csv",row.names = TRUE) 

# 非正态分布因子
cor_matrix_spearman_m1_t <- cor(tdata[, c("M1",  "MIP1a" , "HIF1a" ,  "BMP2"  , "NTXI" ,  "COX2"   ,   "IL17"  , "MMP9" ,  "PECAM1" )], use = "complete.obs", method = "spearman")

write.csv(cor_matrix_spearman_m1_t, "../result/cor_matrix/m1_t_non_normal.csv", row.names = TRUE) 
corrplot(cor_matrix_m1_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

corrplot(cor_matrix_spearman_m1_t, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_m1_t, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_m1_t, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

Tb.Sp与血清因子

cor_matrix_m1_x <- cor(tdata[, c("M1", "CAT" )], use = "complete.obs", method = "pearson")

write.csv(cor_matrix_m1_x, "../result/cor_matrix/m1_x.csv",row.names = TRUE) 



# 血清因子除了CAT都为非正态分布
cor_matrix_spearman_m1_x <- cor(xdata[, c("M1", factors[factors != "CAT"])], use = "complete.obs", method = "spearman")

write.csv(cor_matrix_spearman_m1_x, "../result/cor_matrix/m1_x_non_normal.csv", row.names = TRUE) 
corrplot(cor_matrix_m1_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

corrplot(cor_matrix_spearman_m1_x, method = "circle", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_m1_x, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序

# Create a mixed correlation plot with numbers and circles, with factor names at the top
corrplot::corrplot.mixed(cor_matrix_spearman_m1_x, 
                         upper = "circle",         # 上半部分显示相关性图
                         lower = "number",         # 下半部分显示相关性系数
                         number.cex = 0.7,         # 控制数字大小
                         tl.col = "black",         # 标签颜色
                         tl.srt = 45,              # 标签旋转角度
                         tl.pos = "lt",             # 因子名字放在顶部
                         order = "hclust")         # 聚类排序